This Python Jupyter notebook can parse watercare bills and extract the usage data. It can be used to generate plots of the usage data.

You can download this notebook from https://github.com/neon-ninja/watercare/blob/main/parse.ipynb

You can download PDFs from https://myaccount.watercare.co.nz/bills-and-payments. Set your start date filter as far back as it'll go, and download all the PDFs into a folder called pdfs. Here's a JS snippet you can put in your browser console to save you clicking them all:

var links = document.querySelectorAll('button.flex.hover\\:bg-blue-100')
console.log(`Found ${links.length} links`)
var index = 0
setInterval(function() {
    if (index >= links.length) {
        return
    }
    console.log(index)
    links[index].click()
    index++
}, 250)
In [1]:
import pandas as pd # Tabular data
pd.options.plotting.backend = "plotly"
import plotly.express as px # Plotting
from glob import glob # Finding files
import pdftotext # PDF parsing
from tqdm.auto import tqdm # Progress bars
import re # Regular expressions
from datetime import datetime
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

files = glob("pdfs/*.pdf")
len(files)
Out[1]:
64
In [2]:
def read_PDF(filename):
    with open(filename, "rb") as f:
        pdf = pdftotext.PDF(f)
        text = ""
        for page_text in pdf:
            text += page_text.strip()
        return text

def parse_number(amount):
    if amount.endswith(" cr"):
        return -float(amount[:-3])
    return float(amount)

def parse_text(text):
    if "$500 gift card" in text:
        # This breaks the dollar_amounts regex
        text = text.replace("$ 50", "")
    dates = re.findall(r'(\d{2} [A-Z][a-z]{2} \d{4})', text)
    dollar_amounts = [parse_number(d) for d in re.findall(r'\$ ([\d\.]+(?: cr)?)', text)]
    usage = [parse_number(d) for d in re.findall(r'([\d\.]+) kL', text)]
    unit_rates = [parse_number(d) for d in re.findall(r'\$([\d\.]+)/kL', text)]
    if len(unit_rates) == 4:
        # Rate changed
        unit_rates = unit_rates[::2]

    consumption_period = re.search(r"Consumption period (\d+) days", text).group(1)
    this_reading_match = re.search(r"This reading\s+(\d{2}-\w{3}-\d{2})\s+(\d+)\s*(Estimate|Actual)?", text)
    this_reading_date = this_reading_match.group(1)
    this_reading_date = datetime.strptime(this_reading_date, "%d-%b-%y")
    this_reading_value = int(this_reading_match.group(2))
    this_reading_type = this_reading_match.group(3)

    return {
        "Invoice date": dates[0],
        "Due date": dates[1],
        "Total due ($)": dollar_amounts[0],
        "Opening balance ($)": dollar_amounts[1],
        "Payments received ($)": dollar_amounts[2],
        "Balance still owing ($)": dollar_amounts[3],
        "Water consumption ($)": dollar_amounts[4],
        "Wastewater consumption ($)": dollar_amounts[5],
        "Wastewater fixed ($)": dollar_amounts[6],
        "Balance of current charges ($)": dollar_amounts[7],
        "GST ($)": dollar_amounts[8],
        "Consumption period (days)": int(consumption_period),
        "Reading date": this_reading_date,
        "Reading value (kL)": this_reading_value,
        "Reading type": this_reading_type,
        "Water consumption (kL)": usage[0],
        "Wastewater consumption (kL)": usage[1],
        "Wastewater rate (%)": float(re.search(r"@(\d+.\d+)%", text).group(1)),
        "Water unit rate ($/kL)": unit_rates[0],
        "Wastewater unit rate ($/kL)": unit_rates[1],
    }

results = []
for f in tqdm(files):
    text = read_PDF(f)
    results.append(parse_text(text))
df = pd.DataFrame(results)
for col in ['Invoice date', 'Due date', 'Reading date']:
    df[col] = pd.to_datetime(df[col])
df.sort_values("Invoice date", inplace=True, ascending=False)
df
  0%|          | 0/64 [00:00<?, ?it/s]
Out[2]:
Invoice date Due date Total due ($) Opening balance ($) Payments received ($) Balance still owing ($) Water consumption ($) Wastewater consumption ($) Wastewater fixed ($) Balance of current charges ($) GST ($) Consumption period (days) Reading date Reading value (kL) Reading type Water consumption (kL) Wastewater consumption (kL) Wastewater rate (%) Water unit rate ($/kL) Wastewater unit rate ($/kL)
5 2024-09-06 2024-09-27 484.90 40.24 -40.24 0.00 192.78 263.24 28.88 484.90 63.25 34 2024-09-03 1200 Actual 90.00 70.65 78.5 2.142 3.726
2 2024-07-31 2024-08-21 40.24 -2.96 0.00 -2.96 8.57 11.70 22.93 43.20 5.63 27 2024-07-31 1110 Estimate 4.00 3.14 78.5 2.142 3.726
28 2024-07-05 2024-07-26 0.00 -54.22 0.00 -54.22 9.99 13.63 27.64 51.26 6.69 35 2024-07-04 1106 Actual 5.00 3.92 78.5 1.998 3.476
13 2024-05-30 2024-06-20 0.00 -86.57 0.00 -86.57 5.99 8.20 18.16 32.35 4.22 23 2024-05-30 1101 Estimate 3.00 2.36 78.5 1.998 3.476
54 2024-05-08 2024-05-29 0.00 45.73 -45.73 0.00 -49.95 -68.20 31.58 -86.57 -11.29 40 2024-05-07 1098 Actual 25.00 19.62 78.5 1.998 3.476
57 2024-03-28 2024-04-18 45.73 74.91 -74.91 0.00 11.99 16.37 17.37 45.73 5.96 22 2024-03-28 1123 Estimate 6.00 4.71 78.5 1.998 3.476
61 2024-03-07 2024-03-28 74.91 67.79 -67.79 0.00 19.98 27.29 27.64 74.91 9.77 35 2024-03-06 1117 Estimate 10.00 7.85 78.5 1.998 3.476
3 2024-01-31 2024-02-21 67.79 66.21 -66.21 0.00 17.98 24.54 25.27 67.79 8.84 32 2024-01-31 1107 Estimate 9.00 7.06 78.5 1.998 3.476
40 2024-01-03 2024-01-24 66.21 59.92 -59.92 0.00 17.98 24.54 23.69 66.21 8.64 30 2023-12-30 1098 Estimate 9.00 7.06 78.5 1.998 3.476
0 2023-11-30 2023-12-21 59.92 74.12 -74.12 0.00 15.98 21.83 22.11 59.92 7.82 28 2023-11-30 1089 Estimate 8.00 6.28 78.5 1.998 3.476
48 2023-11-03 2023-11-24 74.12 60.71 -60.71 0.00 19.98 27.29 26.85 74.12 9.67 34 2023-11-02 1081 Estimate 10.00 7.85 78.5 1.998 3.476
60 2023-09-29 2023-10-20 60.71 57.59 -57.59 0.00 15.98 21.83 22.90 60.71 7.92 29 2023-09-29 1071 Estimate 8.00 6.28 78.5 1.998 3.476
9 2023-09-01 2023-09-22 57.59 70.65 -70.65 0.00 13.99 19.12 24.48 57.59 7.51 31 2023-08-31 1063 Actual 7.00 5.50 78.5 1.998 3.476
58 2023-07-31 2023-08-21 70.65 60.53 -60.53 0.00 20.08 27.42 23.15 70.65 9.22 32 2023-07-31 1056 Estimate 11.00 8.64 78.5 1.825 3.174
47 2023-07-03 2023-07-24 60.53 74.22 -74.22 0.00 16.42 22.41 21.70 60.53 7.90 30 2023-06-29 1045 Actual 9.00 7.06 78.5 1.825 3.174
38 2023-05-30 2023-06-20 74.22 85.75 -85.75 0.00 21.90 29.90 22.42 74.22 9.68 31 2023-05-30 1036 Estimate 12.00 9.42 78.5 1.825 3.174
52 2023-05-01 2023-05-22 85.75 61.25 -61.25 0.00 27.38 37.39 20.98 85.75 11.18 29 2023-04-29 1024 Actual 15.00 11.78 78.5 1.825 3.174
62 2023-03-31 2023-04-21 61.25 37.52 -37.52 0.00 16.42 22.41 22.42 61.25 7.99 31 2023-03-31 1009 Estimate 9.00 7.06 78.5 1.825 3.174
11 2023-03-01 2023-03-22 37.52 86.47 -86.47 0.00 7.30 9.97 20.25 37.52 4.89 28 2023-02-28 1000 Actual 4.00 3.14 78.5 1.825 3.174
19 2023-01-31 2023-02-21 86.47 66.31 -66.31 0.00 25.55 34.88 26.04 86.47 11.28 36 2023-01-31 996 Estimate 14.00 10.99 78.5 1.825 3.174
51 2023-01-04 2023-01-25 66.31 63.42 -63.42 0.00 20.08 27.42 18.81 66.31 8.65 26 2022-12-26 982 Actual 11.00 8.64 78.5 1.825 3.174
21 2022-11-30 2022-12-21 63.42 71.37 -71.37 0.00 18.25 24.92 20.25 63.42 8.27 28 2022-11-30 971 Estimate 10.00 7.85 78.5 1.825 3.174
42 2022-11-03 2022-11-24 71.37 68.48 -68.48 0.00 20.08 27.42 23.87 71.37 9.31 33 2022-11-02 961 Actual 11.00 8.64 78.5 1.825 3.174
45 2022-09-30 2022-10-21 68.48 54.83 -54.83 0.00 20.08 27.42 20.98 68.48 8.93 29 2022-09-30 950 Estimate 11.00 8.64 78.5 1.825 3.174
43 2022-09-02 2022-09-23 54.83 77.79 -77.79 0.00 12.78 17.46 24.59 54.83 7.15 34 2022-09-01 939 Actual 7.00 5.50 78.5 1.825 3.174
24 2022-07-29 2022-08-19 77.79 116.49 -116.49 0.00 25.55 34.88 17.36 77.79 10.15 24 2022-07-29 932 Estimate 14.00 10.99 78.5 1.825 3.174
41 2022-07-06 2022-07-27 116.49 70.70 -70.70 0.00 39.24 53.57 23.68 116.49 15.19 35 2022-07-05 918 Actual 23.00 18.06 78.5 1.706 2.966
8 2022-05-31 2022-06-21 70.70 99.67 -99.67 0.00 22.18 30.25 18.27 70.70 9.22 27 2022-05-31 895 Estimate 13.00 10.20 78.5 1.706 2.966
44 2022-05-05 2022-05-26 99.67 58.61 -58.61 0.00 32.41 44.25 23.01 99.67 13.00 34 2022-05-04 882 Actual 19.00 14.92 78.5 1.706 2.966
30 2022-03-31 2022-04-21 58.61 80.16 -80.16 0.00 17.06 23.28 18.27 58.61 7.64 27 2022-03-31 863 Estimate 10.00 7.85 78.5 1.706 2.966
23 2022-03-07 2022-03-28 80.16 42.46 -42.46 0.00 23.88 32.60 23.68 80.16 10.46 35 2022-03-04 853 Actual 14.00 10.99 78.5 1.706 2.966
4 2022-01-28 2022-02-18 42.46 74.80 -74.80 0.00 11.94 16.31 14.21 42.46 5.54 21 2022-01-28 839 Estimate 7.00 5.50 78.5 1.706 2.966
55 2022-01-10 2022-02-01 74.80 63.35 -63.35 0.00 20.47 27.94 26.39 74.80 9.76 39 2022-01-07 832 Actual 12.00 9.42 78.5 1.706 2.966
22 2021-11-29 2021-12-20 63.35 66.73 -66.73 0.00 18.77 25.63 18.95 63.35 8.26 28 2021-11-29 820 Estimate 11.00 8.64 78.5 1.706 2.966
36 2021-11-02 2021-11-23 66.73 72.73 -72.73 0.00 18.77 25.63 22.33 66.73 8.70 33 2021-11-01 809 Actual 11.00 8.64 78.5 1.706 2.966
46 2021-09-29 2021-10-20 72.73 74.08 -74.08 0.00 22.18 30.25 20.30 72.73 9.49 30 2021-09-29 798 Estimate 13.00 10.20 78.5 1.706 2.966
63 2021-08-31 2021-09-21 74.08 67.98 -67.98 0.00 22.18 30.25 21.65 74.08 9.66 32 2021-08-30 785 Estimate 13.00 10.20 78.5 1.706 2.966
10 2021-07-29 2021-08-19 67.98 80.57 -80.57 0.00 20.72 28.27 18.99 67.98 8.87 30 2021-07-29 772 Estimate 13.00 10.20 78.5 1.594 2.772
27 2021-06-30 2021-07-21 80.57 51.64 -51.64 0.00 25.50 34.82 20.25 80.57 10.51 32 2021-06-29 759 Actual 16.00 12.56 78.5 1.594 2.772
16 2021-05-28 2021-06-18 51.64 57.32 -57.32 0.00 14.35 19.57 17.72 51.64 6.74 28 2021-05-28 743 Estimate 9.00 7.06 78.5 1.594 2.772
34 2021-05-03 2021-05-24 57.32 61.73 -61.73 0.00 15.94 21.76 19.62 57.32 7.48 31 2021-04-30 734 Actual 10.00 7.85 78.5 1.594 2.772
1 2021-03-30 2021-04-20 61.73 51.64 -51.64 0.00 17.53 23.95 20.25 61.73 8.05 32 2021-03-30 724 Estimate 11.00 8.64 78.5 1.594 2.772
50 2021-03-01 2021-03-22 51.64 56.05 -56.05 0.00 14.35 19.57 17.72 51.64 6.74 28 2021-02-26 713 Actual 9.00 7.06 78.5 1.594 2.772
31 2021-01-29 2021-02-19 56.05 61.10 -61.10 0.00 15.94 21.76 18.35 56.05 7.31 29 2021-01-29 704 Estimate 10.00 7.85 78.5 1.594 2.772
15 2021-01-05 2021-01-26 61.10 61.10 -61.10 0.00 17.53 23.95 19.62 61.10 7.97 31 2020-12-31 694 Actual 11.00 8.64 78.5 1.594 2.772
18 2020-11-30 2020-12-21 61.10 64.23 -64.23 0.00 17.53 23.95 19.62 61.10 7.97 31 2020-11-30 683 Estimate 11.00 8.64 78.5 1.594 2.772
29 2020-11-02 2020-11-23 64.23 58.58 -58.58 0.00 19.13 26.11 18.99 64.23 8.38 30 2020-10-30 672 Actual 12.00 9.42 78.5 1.594 2.772
56 2020-09-30 2020-10-21 58.58 48.51 -48.51 0.00 15.94 21.76 20.88 58.58 7.64 33 2020-09-30 660 Estimate 10.00 7.85 78.5 1.594 2.772
7 2020-08-31 2020-09-21 48.51 56.69 -56.69 0.00 12.75 17.41 18.35 48.51 6.33 29 2020-08-28 650 Actual 8.00 6.28 78.5 1.594 2.772
49 2020-07-30 2020-08-20 56.69 49.15 -49.15 0.00 15.94 21.76 18.99 56.69 7.39 30 2020-07-30 642 Estimate 10.00 7.85 78.5 1.594 2.772
14 2020-07-01 2020-07-22 49.15 62.01 -62.01 0.00 12.44 16.98 19.73 49.15 6.41 32 2020-06-30 632 Actual 8.00 6.28 78.5 1.555 2.704
12 2020-05-29 2020-06-19 62.01 63.24 -63.24 0.00 18.66 25.47 17.88 62.01 8.09 29 2020-05-29 624 Estimate 12.00 9.42 78.5 1.555 2.704
20 2020-05-01 2020-05-22 63.24 70.60 -70.60 0.00 18.66 25.47 19.11 63.24 8.25 31 2020-04-30 612 Actual 12.00 9.42 78.5 1.555 2.704
6 2020-03-30 2020-04-20 70.60 57.72 -57.72 0.00 21.77 29.72 19.11 70.60 9.21 31 2020-03-30 600 Estimate 14.00 10.99 78.5 1.555 2.704
26 2020-03-02 2020-03-23 57.72 74.28 -74.28 0.00 17.10 23.36 17.26 57.72 7.53 28 2020-02-28 586 Actual 11.00 8.64 78.5 1.555 2.704
53 2020-01-31 2020-02-21 74.28 93.28 -93.28 0.00 23.32 31.85 19.11 74.28 9.69 31 2020-01-31 575 Estimate 15.00 11.78 78.5 1.555 2.704
59 2020-01-03 2020-01-24 93.28 54.66 -54.66 0.00 31.10 42.45 19.73 93.28 12.17 32 2019-12-31 560 Actual 20.00 15.70 78.5 1.555 2.704
32 2019-12-05 2019-12-30 54.66 0.00 0.00 0.00 15.55 21.23 17.88 54.66 7.13 29 2019-11-29 540 Estimate 10.00 7.85 78.5 1.555 2.704
35 2019-11-05 2019-11-26 52.23 66.94 -66.94 0.00 14.00 19.12 19.11 52.23 6.81 31 2019-10-31 530 Actual 9.00 7.07 78.5 1.555 2.704
39 2019-10-02 2019-10-23 66.94 58.96 -58.96 0.00 20.22 27.61 19.11 66.94 8.73 31 2019-09-30 521 Estimate 13.00 10.21 78.5 1.555 2.704
33 2019-09-04 2019-09-25 58.96 73.67 -73.67 0.00 17.11 23.36 18.49 58.96 7.69 30 2019-08-30 508 Actual 11.00 8.64 78.5 1.555 2.704
25 2019-08-02 2019-08-23 73.67 79.33 -79.33 0.00 23.33 31.85 18.49 73.67 9.61 30 2019-07-31 497 Estimate 15.00 11.78 78.5 1.555 2.704
37 2019-07-04 2019-07-25 79.33 67.93 -67.93 0.00 25.81 34.98 18.54 79.33 10.35 31 2019-07-01 482 Actual 16.45 0.55 78.5 1.517 2.618
17 2019-06-05 2019-06-26 67.93 76.86 -76.86 0.00 21.24 28.77 17.92 67.93 8.86 30 2019-05-31 465 Estimate 14.00 10.99 78.5 1.517 2.618
In [3]:
px.scatter(df, x="Reading date", y="Reading value (kL)", color="Reading type", title="Reading values (kL)", trendline="ols")
In [4]:
px.bar(df, x="Reading date", y="Water consumption (kL)")
In [5]:
df["Average daily usage (L)"] = df["Water consumption (kL)"] * 1000 / df["Consumption period (days)"]
px.bar(df, x="Reading date", y="Average daily usage (L)")
In [6]:
px.bar(df, x="Reading date", y="Balance of current charges ($)")
In [7]:
px.line(df, x="Reading date", y=["Water unit rate ($/kL)", "Wastewater unit rate ($/kL)"], title="Unit rates ($/kL)")